library(rlang)
#library(SingleR)
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%() masks rlang::%@%()
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks rlang::flatten()
## ✖ purrr::flatten_chr() masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl() masks rlang::flatten_dbl()
## ✖ purrr::flatten_int() masks rlang::flatten_int()
## ✖ purrr::flatten_lgl() masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw() masks rlang::flatten_raw()
## ✖ purrr::invoke() masks rlang::invoke()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::splice() masks rlang::splice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
options(future.globals.maxSize = 8000 * 1024^2)
source("../../configuration.R")
obj <- readRDS(STConfig$seurat_object_rds)
df <- read.csv(STConfig$annotation_with_stromal, row.names = 1)
head(df)
#merge in the singleR_stromal_w_megs into obj@metadata
new_df = obj@meta.data
# Check if all row names of df1 are in df2
common_row_names <- intersect(rownames(new_df), rownames(df))
#it's cos they are artefacts - don't worry about them!
#subset new_df to the common row names
new_df = new_df[common_row_names, ]
#subset obj to the common row names
obj = subset(obj, cell_id %in% common_row_names)
anno_df = read.csv(STConfig$pth_cell_annotations_final, row.names = 1)
anno_df = anno_df[common_row_names, ]
#subset to index and obj.anno_3_w_megs
sub_anno_df = anno_df[, c("obj.anno_3_w_megs_w_stromal","obj.anno_3_w_megs", "obj.anno_1_w_megs", "obj.anno_5_w_megs")]
head(sub_anno_df)
#merge in the singleR_stromal_w_megs into obj@metadata
library(dplyr)
df1 <- new_df %>% tibble::rownames_to_column(var = "Index")
df2 <- sub_anno_df %>% tibble::rownames_to_column(var = "Index")
# Merge by the Index column
merged_df <- df1 %>%
left_join(df2, by = "Index") %>%
tibble::column_to_rownames(var = "Index") # Restore row names
# Check the result
head(merged_df)
all(rownames(merged_df) == rownames(obj@meta.data))
## [1] TRUE
obj$obj.anno_3_w_megs = merged_df$obj.anno_3_w_megs
obj$obj.anno_3_w_megs_w_stromal = merged_df$obj.anno_3_w_megs_w_stromal
obj$obj.anno_1_w_megs = merged_df$obj.anno_1_w_megs
obj$obj.anno_5_w_megs = merged_df$obj.anno_5_w_megs
#downsample to 200 cells
Idents(obj) = obj$sample_id
#ds_obj = subset(obj, downsample = 10000)
rm(df, df1, df2, new_df, merged_df)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 4151663 221.8 22304570 1191.2 15031563 802.8
## Vcells 426734592 3255.8 1165898015 8895.1 1160479646 8853.8
`%!in%` = Negate(`%in%`)
unique(obj$obj.anno_5_w_megs)
## [1] "MNP" "Immature_myeloid" "Stromal" "Myeloid"
## [5] "Lymphocyte" "Erythroid" "HSPC" "Endothelial"
## [9] "Megakaryocyte" "Granulocyte/mast" "CD69"
obj = subset(obj, subset = obj.anno_5_w_megs %!in% c("CD69"))
Idents(obj) = obj$obj.anno_3_w_megs
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_3_w_megs", label = TRUE, repel = TRUE,size=0.05) +
NoLegend() +
theme(
axis.title = element_blank(), # Removes axis titles
axis.text = element_blank(), # Removes axis text
axis.ticks = element_blank(), # Removes axis ticks
axis.line = element_blank(), # Removes axis lines
panel.grid = element_blank() # Removes grid lines if present
) +
ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
image
Idents(obj) = obj$obj.anno_3_w_megs_w_stromal
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_3_w_megs_w_stromal", label = TRUE, repel = TRUE, ) +
NoLegend() +
theme(
axis.title = element_blank(), # Removes axis titles
axis.text = element_blank(), # Removes axis text
axis.ticks = element_blank(), # Removes axis ticks
axis.line = element_blank(), # Removes axis lines
panel.grid = element_blank() # Removes grid lines if present
) +
ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
print(image)
Idents(obj) = obj$obj.anno_1_w_megs
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_1_w_megs", label = TRUE, repel = TRUE, ) +
NoLegend() +
theme(
axis.title = element_blank(), # Removes axis titles
axis.text = element_blank(), # Removes axis text
axis.ticks = element_blank(), # Removes axis ticks
axis.line = element_blank(), # Removes axis lines
panel.grid = element_blank() # Removes grid lines if present
) +
ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
image
Idents(obj) = obj$obj.anno_5_w_megs
#now make a dimplot of each level and a proportion table
image = DimPlot(obj, group.by = "obj.anno_5_w_megs", label = TRUE, repel = TRUE, ) +
NoLegend() +
theme(
axis.title = element_blank(), # Removes axis titles
axis.text = element_blank(), # Removes axis text
axis.ticks = element_blank(), # Removes axis ticks
axis.line = element_blank(), # Removes axis lines
panel.grid = element_blank() # Removes grid lines if present
) +
ggtitle("")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
image
DEG Heatmap
x <- unique(obj$obj.anno_3_w_megs_w_stromal)
#do differential expression for each cluster
Idents(obj) = obj$obj.anno_3_w_megs_w_stromal
pbmc.markers <- FindAllMarkers(obj, only.pos = TRUE)
## Calculating cluster DC
## Calculating cluster GMP
## Calculating cluster Macrophage
## Calculating cluster Monocyte
## Calculating cluster Adipo-MSC
## Calculating cluster Stromal
## Calculating cluster Myeloid
## Calculating cluster Plasma_cell
## Calculating cluster Erythroid
## Calculating cluster T_cell
## Calculating cluster HSPC
## Calculating cluster Osteo-MSC
## Calculating cluster B_cell
## Calculating cluster Endothelial
## Calculating cluster SMC
## Calculating cluster Megakaryocyte
## Calculating cluster Granulocyte/mast
pbmc.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1)
pbmc.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 10) %>%
ungroup() -> top10
p <- top10 %>%
mutate(cluster=factor(cluster, levels = x)) %>%
arrange(cluster)
p
p$cluster = factor(p$cluster, levels = unique(obj$obj.anno_3_w_megs_w_stromal))
avgexp = AggregateExpression(obj, return.seurat = T)
## Names of identity class contain underscores ('_'), replacing with dashes ('-')
## This message is displayed once every 8 hours.
## Centering and scaling data matrix
image = DoHeatmap(avgexp, features = p$gene, size = 2, draw.lines = FALSE) + scale_fill_gradientn(colours = c("blue", "white", "red"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
plot(image)
## Warning: Removed 121 rows containing missing values or values outside the scale range
## (`geom_point()`).
#featureplot
image <- FeaturePlot(
obj,
features = c("CD3D", "CD3E", "CD79A", "CD14", "FCGR3A", "GYPA", "VWF", "PECAM1", "CD34", "CTSG", "KIT", "PDGFRA"),
ncol = 3
)
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
print(image)